# coding:utf-8

from BigDataWeb.algorithm import Algorithm
import os


class Preprocessor(Algorithm):
    new_data = None
    subset = []
    input_field_types = []
    
    def setInPutFieldName(self, input_field_names):
        Algorithm.setInPutFieldName(self, input_field_names)
        self.input_field_types = []
        for name in self.input_field_names:
            self.input_field_types.append((name, self.data_source[name].dtype))
        
    def implent(self): 
        Algorithm.implent(self)
        self.new_data = self.data_source.copy()
        self.subset = self.input_field_names
    
    def saveToExcle(self):
        raw_data_excel_path = os.path.join(self.work_folder_path, "raw_data.xlsx")
        new_data_excel_path = os.path.join(self.work_folder_path, "new_data.xlsx")
        self.data_source.to_excel(raw_data_excel_path)
        self.new_data.to_excel(new_data_excel_path)
        
    def prepareIpynbItems(self):
        Algorithm.prepareIpynbItems(self)
        self.ipynb_items["#subset#"] = self.subset
